### Replication Package for "Why is Intermediating Houses so Difficult? Evidence from iBuyers"
### Buchak, Matvos, Piskorski, and Seru
###
###
### buchak@stanford.edu

### Creates summary stats tables and figures.

library(data.table)
library(ggplot2)
library(lfe)
library(Hmisc)
library(zoo)
library(stargazer)
library(scales)


source('0_helper_functions.r')

Table_1_Panel_A <- function() {
  
  data <- loadData()
  
  
  ## Summary stats re: home characteristics for iBuyers.
  PanelA <- rbind(rbind(data[year %in% 2013:2018 & iBuyer.buyer == 1 & iBuyer.seller == 0,j=list(type = 'Sale price: iBuyer buys',N=sum(!is.na(saleamount)),m = mean(saleamount,na.rm=T),sd = sd(saleamount,na.rm=T),p5 = quantile(saleamount,0.05,na.rm=T),p25 = quantile(saleamount,0.25,na.rm=T),p50 = quantile(saleamount,0.50,na.rm=T),p75 = quantile(saleamount,0.75,na.r=T),p95 = quantile(saleamount,0.95,na.rm=T))],
                        data[year %in% 2013:2018 & iBuyer.buyer == 0 & iBuyer.seller == 1,j=list(type = 'Sale price: iBuyer sales',N=sum(!is.na(saleamount)),m = mean(saleamount,na.rm=T),sd = sd(saleamount,na.rm=T),p5 = quantile(saleamount,0.05,na.rm=T),p25 = quantile(saleamount,0.25,na.rm=T),p50 = quantile(saleamount,0.50,na.rm=T),p75 = quantile(saleamount,0.75,na.r=T),p95 = quantile(saleamount,0.95,na.rm=T))],
                        data[year %in% 2013:2018 & iBuyer.buyer == 0 & iBuyer.seller == 0,j=list(type = 'Sale price: All others',N=sum(!is.na(saleamount)),m = mean(saleamount,na.rm=T),sd = sd(saleamount,na.rm=T),p5 = quantile(saleamount,0.05,na.rm=T),p25 = quantile(saleamount,0.25,na.rm=T),p50 = quantile(saleamount,0.50,na.rm=T),p75 = quantile(saleamount,0.75,na.r=T),p95 = quantile(saleamount,0.95,na.rm=T))]),
                  rbind(data[year %in% 2013:2018 & iBuyer.buyer == 1 & iBuyer.seller == 0,j=list(type = 'Land sq ft: iBuyer buys',N=sum(!is.na(land_sqft)),m = mean(land_sqft,na.rm=T),sd = sd(land_sqft,na.rm=T),p5 = quantile(land_sqft,0.05,na.rm=T),p25 = quantile(land_sqft,0.25,na.rm=T),p50 = quantile(land_sqft,0.50,na.rm=T),p75 = quantile(land_sqft,0.75,na.r=T),p95 = quantile(land_sqft,0.95,na.rm=T))],
                        data[year %in% 2013:2018 & iBuyer.buyer == 0 & iBuyer.seller == 1,j=list(type = 'Land sq ft: iBuyer sales',N=sum(!is.na(land_sqft)),m = mean(land_sqft,na.rm=T),sd = sd(land_sqft,na.rm=T),p5 = quantile(land_sqft,0.05,na.rm=T),p25 = quantile(land_sqft,0.25,na.rm=T),p50 = quantile(land_sqft,0.50,na.rm=T),p75 = quantile(land_sqft,0.75,na.r=T),p95 = quantile(land_sqft,0.95,na.rm=T))],
                        data[year %in% 2013:2018 & iBuyer.buyer == 0 & iBuyer.seller == 0,j=list(type = 'Land sq ft: All others',N=sum(!is.na(land_sqft)),m = mean(land_sqft,na.rm=T),sd = sd(land_sqft,na.rm=T),p5 = quantile(land_sqft,0.05,na.rm=T),p25 = quantile(land_sqft,0.25,na.rm=T),p50 = quantile(land_sqft,0.50,na.rm=T),p75 = quantile(land_sqft,0.75,na.r=T),p95 = quantile(land_sqft,0.95,na.rm=T))]),
                  rbind(data[year %in% 2013:2018 & iBuyer.buyer == 1 & iBuyer.seller == 0,j=list(type = 'House age: iBuyer buys',N=sum(!is.na(house.age)),m = mean(house.age,na.rm=T),sd = sd(house.age,na.rm=T),p5 = quantile(house.age,0.05,na.rm=T),p25 = quantile(house.age,0.25,na.rm=T),p50 = quantile(house.age,0.50,na.rm=T),p75 = quantile(house.age,0.75,na.r=T),p95 = quantile(house.age,0.95,na.rm=T))],
                        data[year %in% 2013:2018 & iBuyer.buyer == 0 & iBuyer.seller == 1,j=list(type = 'House age: iBuyer sales',N=sum(!is.na(house.age)),m = mean(house.age,na.rm=T),sd = sd(house.age,na.rm=T),p5 = quantile(house.age,0.05,na.rm=T),p25 = quantile(house.age,0.25,na.rm=T),p50 = quantile(house.age,0.50,na.rm=T),p75 = quantile(house.age,0.75,na.r=T),p95 = quantile(house.age,0.95,na.rm=T))],
                        data[year %in% 2013:2018 & iBuyer.buyer == 0 & iBuyer.seller == 0,j=list(type = 'House age: All others',N=sum(!is.na(house.age)),m = mean(house.age,na.rm=T),sd = sd(house.age,na.rm=T),p5 = quantile(house.age,0.05,na.rm=T),p25 = quantile(house.age,0.25,na.rm=T),p50 = quantile(house.age,0.50,na.rm=T),p75 = quantile(house.age,0.75,na.r=T),p95 = quantile(house.age,0.95,na.rm=T))],
                        data[year %in% 2013:2018 &  iBuyer.seller == 1 & seller.tenure < 4*365 ,j=list(type = 'iBuyer hold days',N=sum(!is.na(seller.tenure)),m = mean(seller.tenure,na.rm=T),sd = sd(seller.tenure,na.rm=T),p5 = quantile(seller.tenure,0.05,na.rm=T),p25 = quantile(seller.tenure,0.25,na.rm=T),p50 = quantile(seller.tenure,0.50,na.rm=T),p75 = quantile(seller.tenure,0.75,na.r=T),p95 = quantile(seller.tenure,0.95,na.rm=T))]))
                
  names(PanelA) <- c('Variable','N','Mean','S.D.','5%','25%','50%','75%','95%')
  stargazer(PanelA,summary = F,type='html',out='../out/tables/1A.html')
  
}

Figure_1_Panels_ACD <- function() {
  
  data <- loadData()
  
  data <- data[year %in% 2013:2018]
  
  # Market share by year
  jj <- data[,j=list(s = mean(iBuyer)),by=c('year','Market')]
  ggplot(jj) + geom_line(aes(x=year,y=s,group=Market,color=Market)) + scale_y_continuous(labels = percent) +
    theme_bw() + xlab('Year') + ylab('Market share (%)') + theme(legend.position = c(.15,.65))
  ggsave('../out/figures/1A.png',width=6,height=3.5,units='in',dpi = 300)
  
  # Make a histogram of prices
  data[,Type := 'Other']
  data[iBuyer == 1,Type := 'iBuyer']
  ggplot(data[saleamount < 1000000 & year %in% 2013:2018]) + geom_density(aes(x=saleamount,group=Type,fill=Type),alpha=.25,bw=10000) +
    theme_bw() +  theme(legend.position = c(.75,.75))  + scale_x_continuous(label=unit_format(unit = 'k',scale=1e-3)) +
    ylab('Density') + xlab('Sale amount ($k)')
  ggsave('../out/figures/1C.png',height=3.5,width=6,units='in',dpi = 300)
  
  # Histogram of house ages
  ggplot(data[house.age < 100 & year %in% 2017:2018]) + geom_density(aes(x=house.age,group=Type,fill=Type),alpha=.25,bw=2.5) +
    theme_bw() +  theme(legend.position = c(.75,.75))  +scale_x_continuous(label=comma) +
    ylab('Density') + xlab('House age (years)')
  ggsave('../out/figures/1D.png',height=3.5,width=6,units='in',dpi = 300)
  
  
  # Make chart of PnL year-by-year
  data[,zip3 := substr(zip5,1,3)]
  
  # Get completed transactions
  data <- data[order(pclidirisfrmtd,qtr)]
  data[,buy.price  := saleamount]
  data[,sale.price := shift(saleamount,1,type='lead'),by='pclidirisfrmtd']
  data[,sale.qtr   := shift(qtr ,1,type='lead'),by='pclidirisfrmtd']
  data[,sale.date  := shift(date,1,type='lead'),by='pclidirisfrmtd']
  
  completed <- data[!is.na(sale.price) & sale.date != date]
  completed[,pnl := sale.price / buy.price - 1]
  completed <- completed[pnl != 0 ] # get rid  of zero pnl observations
  completed[,Type := 'Individual']
  completed[iBuyer.buyer == 1,Type := 'iBuyer']
  completed[iBuyer.buyer == 0 & corporateindicator == 'Y',Type := 'Other corporate']
  
  ready <- completed[year %in% 2013:2018]
    
  
  # Plot the time series
  agg <- ready[,j=list(p25 = quantile(100*pnl,.25),p50 = quantile(100*pnl,.5),p75=quantile(100*pnl,.75)),by=c('qtr','Type')]
  ggplot(agg[Type == 'iBuyer' & qtr > as.yearqtr('2014 Q3')]) + geom_line(aes(x=qtr,y=p50)) + geom_line(aes(x=qtr,y=p25),linetype = 'dashed') + geom_line(aes(x=qtr,y=p75),linetype = 'dashed') +
    theme_bw() + geom_hline(yintercept = 0,linetype = 'longdash') + theme(legend.position = NULL) + xlab(NULL) + ylab('iBuyer spread (%)')
  ggsave('../out/figures/1B.png',height=3.5,width=6,units='in',dpi = 300)
  
  
}


# Run it
Table_1_Panel_A()
Figure_1_Panels_ACD()




